{ "cells": [ { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from scipy.stats import f_oneway\n", "from sklearn.metrics import mean_absolute_error \n", "import altair as alt" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('StudentPerformanceFactors.csv')" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "df = df.dropna() \n", "\n", "df = df[df['Tutoring_Sessions'] != 8]" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "sum_stats = df['Exam_Score'].describe() \n", "\n", "lower_bound = sum_stats['mean'] - 3 * sum_stats['std']\n", "upper_bound = sum_stats['mean'] + 3 * sum_stats['std'] \n", "filtered_df = df[(df['Exam_Score'] > lower_bound) & (df['Exam_Score'] < upper_bound)] " ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "df = df.iloc[:5000]" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "categorical_columns = ['Parental_Involvement', 'Access_to_Resources', 'Extracurricular_Activities', 'Motivation_Level', 'Internet_Access', 'Family_Income', 'Teacher_Quality', 'School_Type', 'Peer_Influence', 'Learning_Disabilities', 'Parental_Education_Level', 'Distance_from_Home', 'Gender']" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Hours_StudiedAttendanceParental_InvolvementAccess_to_ResourcesExtracurricular_ActivitiesSleep_HoursPrevious_ScoresMotivation_LevelInternet_AccessTutoring_SessionsFamily_IncomeTeacher_QualitySchool_TypePeer_InfluencePhysical_ActivityLearning_DisabilitiesParental_Education_LevelDistance_from_HomeGenderExam_Score
02384LowHighNo773LowYes0LowMediumPublicPositive3NoHigh SchoolNearMale67
11964LowMediumNo859LowYes2MediumMediumPublicNegative4NoCollegeModerateFemale61
22498MediumMediumYes791MediumYes2MediumMediumPublicNeutral4NoPostgraduateNearMale74
32989LowMediumYes898MediumYes1MediumMediumPublicNegative4NoHigh SchoolModerateMale71
41992MediumMediumYes665MediumYes3MediumHighPublicNeutral4NoCollegeNearFemale70
...............................................................
51711274LowHighYes655HighYes0MediumLowPublicPositive3NoPostgraduateNearMale63
51721884HighLowNo664MediumYes2LowHighPublicPositive2NoHigh SchoolNearFemale67
51731482MediumMediumYes467MediumYes1HighMediumPublicNeutral4NoHigh SchoolModerateFemale65
51742376MediumMediumNo766MediumYes2MediumMediumPublicNeutral3NoCollegeNearMale67
51761269HighHighYes753LowYes2LowHighPublicNeutral3NoCollegeNearMale64
\n", "

5000 rows × 20 columns

\n", "
" ], "text/plain": [ " Hours_Studied Attendance Parental_Involvement Access_to_Resources \\\n", "0 23 84 Low High \n", "1 19 64 Low Medium \n", "2 24 98 Medium Medium \n", "3 29 89 Low Medium \n", "4 19 92 Medium Medium \n", "... ... ... ... ... \n", "5171 12 74 Low High \n", "5172 18 84 High Low \n", "5173 14 82 Medium Medium \n", "5174 23 76 Medium Medium \n", "5176 12 69 High High \n", "\n", " Extracurricular_Activities Sleep_Hours Previous_Scores \\\n", "0 No 7 73 \n", "1 No 8 59 \n", "2 Yes 7 91 \n", "3 Yes 8 98 \n", "4 Yes 6 65 \n", "... ... ... ... \n", "5171 Yes 6 55 \n", "5172 No 6 64 \n", "5173 Yes 4 67 \n", "5174 No 7 66 \n", "5176 Yes 7 53 \n", "\n", " Motivation_Level Internet_Access Tutoring_Sessions Family_Income \\\n", "0 Low Yes 0 Low \n", "1 Low Yes 2 Medium \n", "2 Medium Yes 2 Medium \n", "3 Medium Yes 1 Medium \n", "4 Medium Yes 3 Medium \n", "... ... ... ... ... \n", "5171 High Yes 0 Medium \n", "5172 Medium Yes 2 Low \n", "5173 Medium Yes 1 High \n", "5174 Medium Yes 2 Medium \n", "5176 Low Yes 2 Low \n", "\n", " Teacher_Quality School_Type Peer_Influence Physical_Activity \\\n", "0 Medium Public Positive 3 \n", "1 Medium Public Negative 4 \n", "2 Medium Public Neutral 4 \n", "3 Medium Public Negative 4 \n", "4 High Public Neutral 4 \n", "... ... ... ... ... \n", "5171 Low Public Positive 3 \n", "5172 High Public Positive 2 \n", "5173 Medium Public Neutral 4 \n", "5174 Medium Public Neutral 3 \n", "5176 High Public Neutral 3 \n", "\n", " Learning_Disabilities Parental_Education_Level Distance_from_Home \\\n", "0 No High School Near \n", "1 No College Moderate \n", "2 No Postgraduate Near \n", "3 No High School Moderate \n", "4 No College Near \n", "... ... ... ... \n", "5171 No Postgraduate Near \n", "5172 No High School Near \n", "5173 No High School Moderate \n", "5174 No College Near \n", "5176 No College Near \n", "\n", " Gender Exam_Score \n", "0 Male 67 \n", "1 Female 61 \n", "2 Male 74 \n", "3 Male 71 \n", "4 Female 70 \n", "... ... ... \n", "5171 Male 63 \n", "5172 Female 67 \n", "5173 Female 65 \n", "5174 Male 67 \n", "5176 Male 64 \n", "\n", "[5000 rows x 20 columns]" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "
\n", "" ], "text/plain": [ "alt.HConcatChart(...)" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "slider = alt.binding_range(\n", " min=df['Exam_Score'].min(), \n", " max=df['Exam_Score'].max(), \n", " name=\"Exam Score\", \n", " step=1\n", ")\n", "\n", "selection = alt.selection_point(bind=slider, fields=['Exam_Score'])\n", "\n", "\n", "chart1 = alt.Chart(df).transform_filter(\n", " selection\n", ").mark_circle().encode(\n", " alt.X('Sleep_Hours:Q', scale=alt.Scale(domain=[4, 10]), title='Sleep Hours'),\n", " alt.Y('Exam_Score:Q', scale=alt.Scale(domain=[50, 101]), title='Exam Score'),\n", " color=alt.Color('Teacher_Quality:N', title='Teacher Quality'),\n", " tooltip=['Sleep_Hours', 'Exam_Score', 'Hours_Studied', 'Teacher_Quality']\n", ").add_params(\n", " selection\n", ").properties(title=\"Sleep Hours Vs Exam Scores Segmented by Teacher Quality\")\n", "\n", "\n", "chart2 = chart1.encode(x='Hours_Studied:Q').properties(title = \\\n", " \"Hours Studied Compared To Exam Scores Labeled By Teacher Quality\")\n", "\n", "chart1 | chart2" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\sunit\\AppData\\Local\\Temp\\ipykernel_22008\\1167229257.py:1: AltairDeprecationWarning: Deprecated in `altair=5.0.0`. Use selection_point instead.\n", " selection = alt.selection_multi(on='mouseover')\n", "C:\\Users\\sunit\\AppData\\Local\\Temp\\ipykernel_22008\\1167229257.py:10: AltairDeprecationWarning: Deprecated in `altair=5.0.0`. Use add_params instead.\n", " ).add_selection(\n" ] } ], "source": [ "selection = alt.selection_multi(on='mouseover')\n", "\n", "\n", "chart = alt.Chart(df).mark_circle().encode(\n", " x=alt.X('Attendance', scale=alt.Scale(type='linear', domain=[60, 100])),\n", " y=alt.Y('Exam_Score', scale=alt.Scale(type='linear', domain=[50, 105]), title=\"Exam Scores\"), \n", " color=alt.condition(selection, 'Access_to_Resources', alt.value('lightgray'), title=\"Access To Resources\"),\n", " size=alt.value(100),\n", " tooltip=['Attendance', 'Exam_Score', 'Access_to_Resources', 'Family_Income']\n", ").add_selection(\n", " selection\n", ")\n", "\n" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "\n", "
\n", "" ], "text/plain": [ "alt.Chart(...)" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "access_to_resources = ['High', 'Medium', 'Low'] \n", "\n", "\n", "rating_res = alt.binding_radio(\n", " options=access_to_resources, \n", " name=\"Access to Resources\"\n", ")\n", "\n", "\n", "rating_select = alt.selection_point(\n", " fields=['Access_to_Resources'], \n", " bind=rating_res\n", ")\n", "\n", "\n", "rating_color_condition = alt.condition(\n", " rating_select,\n", " alt.Color('Access_to_Resources:N'),\n", " alt.value('lightgray')\n", ")\n", "\n", "\n", "opacity_condition = alt.condition(\n", " rating_select,\n", " alt.value(1), \n", " alt.value(0) \n", ")\n", "\n", "highlight_ratings = chart.add_params(\n", " rating_select\n", ").encode(\n", " color=rating_color_condition, \n", " opacity=opacity_condition \n", ").properties(title=\"Attendence Vs Exam Scores Segmented By Access To Resources\") \n", "\n", "\n", "highlight_ratings" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" } }, "nbformat": 4, "nbformat_minor": 2 }